## steps
# extract data on debtor-creditor-instrument type level
# import debtor dataset
# inner join between the two datasets to remove redundant debtors and import size flag + nace_code
# exclude only observations that do not have any amount reported
# missing data on creditor-loan level is excluded which ensures that we don't have any missing amounts

# import libraries
import pandas as pd
import pyodbc
import os

path_data = r'P:\ECB business areas\DGR\Staff\papouts\Research\Active\Firm Heterogeneities\Final datasets'
if not os.path.isdir(path_data):
    os.makedirs(path_data)


sql_conn = pyodbc.connect('DSN=DISC DP Impala 64bit',autocommit=True)

query = """

 SELECT DISTINCT cntry_dbtr, 
                 dbtr_id,
                 crdtr_id,
                 nace_code,
                 typ_instr_corr,
                 outst_amnt_byinstr,
                 outst_amnt_per_crdtr,
                 outst_amnt_crdtr_loan,
                 outst_amnt_ttl, 
                 outst_amnt_ttl_max,
                 outst_amnt_ttl_max/outst_amnt_ttl AS mb_shr,
                 CASE WHEN (outst_amnt_per_crdtr - outst_amnt_ttl_max) = 0 THEN 1 ELSE 0 END AS mb_flag,
                 CASE WHEN (outst_amnt_per_crdtr - outst_amnt_ttl_max) = 0 THEN outst_amnt_crdtr_loan ELSE NULL END AS mb_ona_loan
                 
 
 FROM (
 SELECT          cntry_dbtr, 
                 dbtr_id, 
                 crdtr_id,
                 cntrct_id,
                 instrmnt_id,
                 typ_instr_corr,
                 nace_code,
                 otstndng_nmnl_amnt_cv_c,
                 MAX(CASE WHEN default_aggr_dbtr_lvl = -99 AND default_dbtr_lvl = -99 THEN -99
                      WHEN (default_aggr_dbtr_lvl = 0 AND default_dbtr_lvl = -99) OR
                           (default_aggr_dbtr_lvl = 0 AND default_dbtr_lvl = 0) OR
                           (default_aggr_dbtr_lvl = -99 AND default_dbtr_lvl = 0) THEN 0
                      ELSE 1 END) OVER (PARTITION BY cntry_dbtr, dbtr_id) AS dflt_stts_debtor,
                 outst_amnt_byinstr,
                 outst_amnt_per_crdtr,
                 outst_amnt_crdtr_loan,
                 outst_amnt_ttl,
                 MAX(outst_amnt_per_crdtr) OVER (PARTITION BY cntry_dbtr, dbtr_id) AS outst_amnt_ttl_max

                 
                 
 FROM (
 SELECT          cntry_dbtr,
                 dbtr_id, 
                 crdtr_id,
                 cntrct_id,
                 instrmnt_id,
                 typ_instr_corr,
                 t2.nace_code,
                 otstndng_nmnl_amnt_cv_c,
                 MAX(dflt_stts_instr_flag) OVER (PARTITION BY cntry_dbtr, dbtr_id) AS default_aggr_dbtr_lvl,
                 MAX(dflt_stts_debtor_flag) OVER (PARTITION BY cntry_dbtr, dbtr_id) AS default_dbtr_lvl,
                 SUM(otstndng_nmnl_amnt_cv_c) OVER (PARTITION BY cntry_dbtr, dbtr_id, typ_instr_corr) AS outst_amnt_byinstr,
                 SUM(otstndng_nmnl_amnt_cv_c) OVER (PARTITION BY cntry_dbtr, dbtr_id,  crdtr_id) AS outst_amnt_per_crdtr,
                 SUM(otstndng_nmnl_amnt_cv_c) OVER (PARTITION BY cntry_dbtr, dbtr_id,  crdtr_id, typ_instr_corr) AS outst_amnt_crdtr_loan,               
                 SUM(otstndng_nmnl_amnt_cv_c) OVER (PARTITION BY cntry_dbtr, dbtr_id ) AS outst_amnt_ttl
                 

FROM            lab_org_dgr_fir.ac_instr_201912_kk t1
LEFT JOIN       lab_org_dgr_fir.ac_dbtr_201912_kk t2
ON              t1.dbtr_id=t2.entty_riad_id

WHERE           nvl(otstndng_nmnl_amnt_cv_c,0) + nvl(off_blnc_sht_amnt_cv,0) + nvl(cmmtmnt_incptn_cv,0) > 0                              
                AND prjct_fnnc_ln != 1
      
          ) tbl
 
      
                                ) tbl2
                                                
WHERE    dflt_stts_debtor = 0 
        and outst_amnt_crdtr_loan IS NOT NULL 
                                     
                """
                
dd_instr = pd.read_sql(query, sql_conn)
sql_conn.close()

# import final debtors dataset
dbtr_f = pd.read_stata(path_data + r'\dbtr_combined_dt.dta')

# merge datasets and take size flag
dd_merged = dd_instr.merge(dbtr_f[['cntry', 'entty_riad_id','sz_f']], how='left', left_on=['cntry_dbtr', 'dbtr_id'], right_on=['cntry', 'entty_riad_id'])

# final dataset
dd_final = dd_merged[dd_merged.sz_f.isna() == False].drop(columns=['cntry', 'entty_riad_id'])

# save dataset
dd_final.to_stata(path_data+r'\instr_lvl_mb_dt_f.dta', write_index=False)
